rm(list=ls())
library(data.table)
library(qte)
library(ggplot2)
library(pracma)
library(dplyr)
library(haven)
library(Counterfactual)

dir <- ""
dat <- read_dta(paste(dir, "zoc_analysis_data_ela.dta", sep=""))
dat <- as.data.table(dat)
dat <- dat[endyear>=2008,]
dat <- as.data.frame(dat)

################################################################################
### Post years 
zoc_students <- dat[which(dat$endyear>=2013),]
zoc_students <-subset(zoc_students, zoc_students$analysis_schools==1)
# collapse to school level (note ATE is constant within schools)
school <- zoc_students %>% 
  group_by(preferredlocationcode, zoc_hs, endyear) %>%
  summarize(ATE=mean(ATE, na.rm = TRUE),
            hispanic=mean(trend_hispanic_1, na.rm=TRUE),
            black=mean(trend_black_1, na.rm=TRUE),
            poverty=mean(trend_poverty_1, na.rm=TRUE),
            migrant=mean(trend_migrant_1, na.rm=TRUE),
            spanish_at_home=mean(trend_spanish_at_home_1, na.rm=TRUE),
            college=mean(trend_college_1,na.rm=TRUE),
            obs = n())


cf_1 <- counterfactual(ATE ~ 
                         poverty + hispanic  + black + migrant +college + female + spanish_at_home
                     ,
                     quantiles=seq(5,95, by=5)/100,
                     data=school,
                     group=zoc_hs,
                     treatment=TRUE,
                     decomposition = TRUE,
                     weights=obs,
                     method="lpm")  

cf_data1 <- as.data.frame(cf_1$resTE)
cf_data1$uci <- cf_data1$total.effect + 1.96*cf_data1$se.te 
cf_data1$lci <- cf_data1$total.effect - 1.96*cf_data1$se.te 
cf_data1$quantile <- seq(5,95, by=5)/100


################################################################################
### Pre ###
zoc_students <- dat[which(dat$endyear<=2012),]
zoc_students <-subset(zoc_students, zoc_students$analysis_schools==1)
zoc_students$notreat <- 1 - zoc_students$zoc_hs
# collapse to school level (note ATE is constant within schools)
school <- zoc_students %>% 
  group_by(preferredlocationcode, zoc_hs, endyear) %>%
  summarize(ATE=mean(ATE, na.rm = TRUE),
            hispanic=mean(trend_hispanic_1, na.rm=TRUE),
            black=mean(trend_black_1, na.rm=TRUE),
            poverty=mean(trend_poverty_1, na.rm=TRUE),
            migrant=mean(trend_migrant_1, na.rm=TRUE),
            spanish_at_home=mean(trend_spanish_at_home_1, na.rm=TRUE),
            college=mean(trend_college_1,na.rm=TRUE),
            obs = n())


cf_0 <- counterfactual(ATE ~ 
                         poverty + hispanic  + black + migrant +college + female + spanish_at_home
                     ,
                     quantiles=seq(5,95, by=5)/100,
                     data=school,
                     group=zoc_hs,
                     treatment=TRUE,
                     decomposition = TRUE,
                     weights=obs,
                     method="lpm")  

cf_data0 <- as.data.frame(cf_0$resTE)

cf_data0$uci <- cf_data0$total.effect + 1.96*cf_data0$se.te 
cf_data0$lci <- cf_data0$total.effect - 1.96*cf_data0$se.te 
cf_data0$quantile <- seq(5,95, by=5)/100


cf_dd <- cf_data1
cf_dd$te <- cf_dd$total.effect - cf_data0$total.effect
cf_dd$te_se <- sqrt(cf_dd$se.te^2 + cf_data0$se.te^2)

cf_dd$uci <- cf_dd$te + 1.96*cf_dd$te_se 
cf_dd$lci <- cf_dd$te - 1.96*cf_dd$te_se 
cf_dd$quantile <- seq(5,95, by=5)/100

write.csv(cf_dd, paste(dir, "/zoc_va_qte.csv", sep=""))
